*SM-E12

set scheme stcolor
version 17
use ../Metadata/tess_analysisdata.dta, clear

********************************************************************************
* outcome variables

	cap drop hyp_true hyp_true_insample hyp_anytrue_insample total_hyp_true_insample hyp_10pct_supp_insample successfulexp_insample
	
	gen hyp_true=1 if twop<0.05 & rightdir=="yes"
	replace hyp_true=1 if twop<0.05 & rightdir==""
	replace hyp_true=0 if twop>=0.05 
	replace hyp_true=0 if twop<0.05 & rightdir=="no"
	

* at least one hypothesis was supported in the study
	* true hypotheses insample
	gen hyp_true_insample= hyp_true if insample==1

	egen hyp_anytrue_insample=max(hyp_true_insample), by(vendor_id)
	lab var hyp_anytrue_insample "At least 1 hypothesis supported"


* at least 10% of hypotheses supported 
	// total hypotheses supported 
	egen total_hyp_true_insample=sum(hyp_true_insample), by(vendor_id)

	gen temp=total_hyp_insample*0.1
	gen hyp_10pct_supp_insample=1 if total_hyp_true_insample>=temp & total_hyp_true_insample!=.
	replace hyp_10pct_supp_insample=0 if total_hyp_true_insample<temp & temp!=.
	drop temp
	sum hyp_10pct_supp_insample
	lab var hyp_10pct_supp_insample "At least 10% hypotheses supported"	
	
* successful experiment (at least 1 hyp true if #hyp<=10; or 10% hyp true if #hyp>10)	
	gen successfulexp_insample=hyp_anytrue_insample if total_hyp_insample<=10
	replace successfulexp_insample=hyp_10pct_supp_insample if total_hyp_insample>10

	lab var successfulexp_insample "Successful study"	
	
************************************

* median sample size per category
preserve
keep if hyp_num==1
egen samplesizecatmed=median(samplesize), by(samplesizecat)
tab samplesizecat samplesizecatmed
restore

* create frame
cap frame change default
cap frame drop results
frame create results strL varname mean se ul ll str8 N


* Sample size, study level

	* study level analysis
	preserve
	keep if hyp_num==1

	proportion successfulexp_insample if samplesize<=1000, level(83.4)
	mat table = r(table)	
	local mean = table[1,2]
	local se = table[2,2]
	local ll = table[5,2]
	local ul = table[6,2]
	local N	 = table[7,1]+1		
	frame post results ("1000 or less") (`mean') (`se') (`ll') (`ul') ("N=`N'")
		
	proportion successfulexp_insample if samplesize>=1001 & samplesize<=2000, level(83.4)		
	mat table = r(table)	
	local mean = table[1,2]
	local se = table[2,2]
	local ll = table[5,2]
	local ul = table[6,2]
	local N	 = table[7,1]+1		
	frame post results ("1001-2000") (`mean') (`se') (`ll') (`ul') ("N=`N'")

	proportion successfulexp_insample if samplesize>=2001 & samplesize<=3000, level(83.4)		
	mat table = r(table)	
	local mean = table[1,2]
	local se = table[2,2]
	local ll = table[5,2]
	local ul = table[6,2]
	local N	 = table[7,1]+1		
	frame post results ("2001-3000") (`mean') (`se') (`ll') (`ul') ("N=`N'")
	
	proportion successfulexp_insample if samplesize>=3001, level(83.4)	
	mat table = r(table)	
	local mean = table[1,2]
	local se = table[2,2]
	local ll = table[5,2]
	local ul = table[6,2]
	local N	 = table[7,1]+1		
	frame post results ("Over 3001") (`mean') (`se') (`ll') (`ul') ("N=`N'")	
			
	restore

* Sample size, hyp level, all tests

	proportion hyp_true if N_person<=1000 & insample==1, level(83.4)
	mat table = r(table)	
	local mean = table[1,2]
	local se = table[2,2]
	local ll = table[5,2]
	local ul = table[6,2]
	local N	 = table[7,1]+1		
	frame post results ("1000 or less") (`mean') (`se') (`ll') (`ul') ("N=`N'")
		
	proportion hyp_true if N_person>=1001 & N_person<=2000& insample==1, level(83.4)		
	mat table = r(table)	
	local mean = table[1,2]
	local se = table[2,2]
	local ll = table[5,2]
	local ul = table[6,2]
	local N	 = table[7,1]+1		
	frame post results ("1001-2000") (`mean') (`se') (`ll') (`ul') ("N=`N'")

	proportion hyp_true if N_person>=2001 & N_person<=3000& insample==1, level(83.4)		
	mat table = r(table)	
	local mean = table[1,2]
	local se = table[2,2]
	local ll = table[5,2]
	local ul = table[6,2]
	local N	 = table[7,1]+1		
	frame post results ("2001-3000") (`mean') (`se') (`ll') (`ul') ("N=`N'")
	
	proportion hyp_true if N_person>=3001& insample==1, level(83.4)	
	mat table = r(table)	
	local mean = table[1,2]
	local se = table[2,2]
	local ll = table[5,2]
	local ul = table[6,2]
	local N	 = table[7,1]+1		
	frame post results ("Over 3001") (`mean') (`se') (`ll') (`ul') ("N=`N'")	

* graph

frame change results
gen xlabel=_n
labmask xlabel, values(varname)
gen mlabel=0.1

		graph twoway ///
		(bar mean xlabel if xlabel<=4, barw(0.8) fc(stc2%50) colordiscrete lc(none)) || ///
		(rcap ll ul xlabel if xlabel<=4,lc(gs7)) ///
		(scatter mlabel xlabel if _n<=4, ///		
msym(none) mlab(N) mlabpos(6) mlabcolor(black) mlabsize(3.4) mlabangle(hor)) ///
		, ///	
			ytitle("Pr(positive result)", size(3.8) margin(r=0 l=-4)) ///
			xlabel( ///
			, labsize(3.5) angle(ver) valuelabels) ///
			xtitle("Study sample size", margin(b=0) size(3.8)) ///
			ylabel(0(0.2)1, labsize(3.8) nogrid) ///
			title("{it:Studies}") ///
			scheme(s2mono) ///
			graphregion(color(white)) ///
			legend(off) ///
			name(study, replace)
	

	
* hypothesis level	
		graph twoway ///
		(bar mean xlabel if xlabel>4, barw(0.8) fc(stc1%50) colordiscrete lc(none)) || ///
		(rcap ll ul xlabel if xlabel>4,lc(gs7)) ///
		(scatter mlabel xlabel if _n>4, ///		
msym(none) mlab(N) mlabpos(6) mlabcolor(black) mlabsize(3.4) mlabangle(hor)) ///
		, ///		
			ytitle("Pr(supported hypothesis)", size(3.8) margin(r=0 l=-4)) ///
			xlabel( ///
			, labsize(3.5) angle(ver) valuelabels) ///
			xtitle("Test sample size", margin(b=0) size(3.8)) ///
			ylabel(0(0.2)1, labsize(3.8) nogrid) ///
			title("{it:Hypotheses}") ///
			scheme(s2mono) ///
			graphregion(color(white)) ///
			legend(off) name(hyp, replace)


				
graph combine hyp study , ///
			graphregion(color(white)) 
			
					
		frame change default	


	graph export "../Results/SM-E12-Figure-SamplesizeSuccess.pdf", replace	
